[flang][cuda] Data transfer with descriptor#114302
Closed
clementval wants to merge 5 commits intollvm:users/clementval/cuf_data_transfer_desc1from
Closed
[flang][cuda] Data transfer with descriptor#114302clementval wants to merge 5 commits intollvm:users/clementval/cuf_data_transfer_desc1from
clementval wants to merge 5 commits intollvm:users/clementval/cuf_data_transfer_desc1from
Conversation
Member
|
@llvm/pr-subscribers-flang-runtime Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesUse the feature added in #114301 to perform data transfer between data having a descriptor. Full diff: https://github.com/llvm/llvm-project/pull/114302.diff 2 Files Affected:
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index 4778a4ae77683f..f25d3b531c84f0 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -9,10 +9,32 @@
#include "flang/Runtime/CUDA/memory.h"
#include "../terminator.h"
#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/assign.h"
#include "cuda_runtime.h"
namespace Fortran::runtime::cuda {
+static void *MemmoveHostToDevice(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
+ return dst;
+}
+
+static void *MemmoveDeviceToHost(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost));
+ return dst;
+}
+
+static void *MemmoveDeviceToDevice(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
+ return dst;
+}
+
extern "C" {
void *RTDEF(CUFMemAlloc)(
@@ -90,8 +112,16 @@ void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc,
void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
unsigned mode, const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
- terminator.Crash(
- "not yet implemented: CUDA data transfer between two descriptors");
+ MemmoveFct memmoveFct;
+ if (mode == kHostToDevice) {
+ memmoveFct = &MemmoveHostToDevice;
+ } else if (mode == kDeviceToHost) {
+ memmoveFct = &MemmoveDeviceToHost;
+ } else if (mode == kDeviceToDevice) {
+ memmoveFct = &MemmoveDeviceToDevice;
+ }
+ Fortran::runtime::Assign(
+ dstDesc, srcDesc, terminator, MaybeReallocate, memmoveFct);
}
}
} // namespace Fortran::runtime::cuda
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 157d3cdb531def..ade05e21b70a89 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -9,11 +9,17 @@
#include "flang/Runtime/CUDA/memory.h"
#include "gtest/gtest.h"
#include "../../../runtime/terminator.h"
+#include "../tools.h"
#include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/allocator.h"
#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/CUDA/descriptor.h"
+#include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry.h"
#include "cuda_runtime.h"
+using namespace Fortran::runtime;
using namespace Fortran::runtime::cuda;
TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
@@ -29,3 +35,37 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
EXPECT_EQ(42, host);
RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
}
+
+static OwningPtr<Descriptor> createAllocatable(
+ Fortran::common::TypeCategory tc, int kind, int rank = 1) {
+ return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
+ CFI_attribute_allocatable);
+}
+
+TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
+ using Fortran::common::TypeCategory;
+ RTNAME(CUFRegisterAllocator)();
+ // INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
+ auto dev{createAllocatable(TypeCategory::Integer, 4)};
+ dev->SetAllocIdx(kDeviceAllocatorPos);
+ EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
+ RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
+ RTNAME(AllocatableAllocate)
+ (*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_TRUE(dev->IsAllocated());
+
+ // Create temp array to transfer to device.
+ auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+ std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
+ RTNAME(CUFDataTransferDescDesc)(*dev, *x, kHostToDevice, __FILE__, __LINE__);
+
+ // Retrieve data from device.
+ auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+ std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
+ RTNAME(CUFDataTransferDescDesc)(
+ *host, *dev, kDeviceToHost, __FILE__, __LINE__);
+
+ for (unsigned i = 0; i < 10; ++i) {
+ EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
+ }
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Renaud-K
reviewed
Oct 30, 2024
Renaud-K
approved these changes
Oct 30, 2024
Contributor
Renaud-K
left a comment
There was a problem hiding this comment.
Looks good. Nice way of testing the runtime.
clementval
added a commit
that referenced
this pull request
Nov 1, 2024
smallp-o-p
pushed a commit
to smallp-o-p/llvm-project
that referenced
this pull request
Nov 3, 2024
Reopen PR llvm#114302 as it was automatically closed. Review in llvm#114302
NoumanAmir657
pushed a commit
to NoumanAmir657/llvm-project
that referenced
this pull request
Nov 4, 2024
Reopen PR llvm#114302 as it was automatically closed. Review in llvm#114302
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Use the feature added in #114301 to perform data transfer between data having a descriptor.